# -*- coding: utf-8 -*-
"""
@Time    : 2024/7/10 19:53 
@Author  : ZhangShenao 
@File    : document_splitter.py 
@Desc    : 文档分割器
"""
from langchain_text_splitters import RecursiveCharacterTextSplitter


class DocumentSplitter:
    """文档分割器,用于将长文档分割成多个小文档块"""

    def __init__(self, chunk_size: int, chunk_overlap: int):
        """构造函数"""

        # 初始化分割参数
        self.__chunk_size = chunk_size
        self.__chunk_overlap = chunk_overlap

    def split_documents(self, documents: []) -> []:
        """
        将文档分割成多个小文档块
        :param documents: 原始文档列表
        :return: 分割后的文档分块列表
        """

        splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.__chunk_size,
            chunk_overlap=self.__chunk_overlap)
        return splitter.split_documents(documents)
